#  
#  WordCompounder --- Executable file name: GoMusubi
#  Copyright(C) 2021 Kaoru Sagara and Syugo Nakamura 
#  This software is released under any of the GPL (see the file GPL), the LGPL(see the file LGPL), or the BSD License (see the file BSD).

from .mecab_parser import MecabParser
from components.utils.unicode import is_hiragana


class UnidicMecabParser(MecabParser):
  def _filter(self, result_list):
    """
    Mecabによるパース結果を修正するカスタム処理。

    Args:
        result_list (list): Mecabのパース結果が格納されたリスト

    Returns:
        list: 新しいパース結果のリスト
    """
    new_result_list = []
    last_surface = None
    last_features = None
    
    for result in result_list:
      surface, features = result
      can_append = False

      if features[0] in ['形状詞', '記号']:
        # 付与された品詞ラベルが「形状詞」または「記号」の場合，品詞ラベルを「名詞」に変更
        new_features = features
        new_features[0] = '名詞'
        new_result_list.append([surface, new_features])

      elif features[0] == '接尾辞' and last_surface:
        # 「接尾辞」の語を直前の語に連結
        new_result_list[-1][0] = last_surface + surface

      else:
        can_append = True

      if last_features and last_features[0] == '接頭辞':
        # 「接頭辞」の語を直後の語に連結
        new_result_list[-1][0] = last_surface + surface
        new_result_list[-1][1] = features
        can_append = False

      # if last_surface and (not is_hiragana(surface)) and (not is_hiragana(last_surface)):
      #   # 連続する「カタカナ」「漢字」「数字」「英字」のみの語を連結
      #   new_result_list[-1][0] = last_surface + surface
      #   can_append = False
      
      if can_append:
        new_result_list.append([surface, features])

      last_surface = surface
      last_features = features

    return new_result_list
